Deep features for image retrieval - assignment


In [1]:
import graphlab

In [2]:
image_train = graphlab.SFrame('image_train_data/')
image_test = graphlab.SFrame('image_test_data/')


[INFO] This non-commercial license of GraphLab Create is assigned to kgrodzicki@gmail.comand will expire on October 14, 2016. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-64 - Server binary: /usr/local/lib/python2.7/dist-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1448103269.log
[INFO] GraphLab Server Version: 1.6.1

In [4]:
image_train['label'].sketch_summary()


Out[4]:
+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+

In [5]:
dogs = image_train[image_train['label'] == 'dog']
automobiles = image_train[image_train['label'] == 'automobile']
cats = image_train[image_train['label'] == 'cat']
birds = image_train[image_train['label'] == 'bird']

In [22]:
cat_image = image_test[0:1]
cat_image['image'].show()



In [28]:
cat_model = graphlab.nearest_neighbors.create(cats,
                                              features=['deep_features'],
                                              label='id')
car_model = graphlab.nearest_neighbors.create(automobiles,
                                              features=['deep_features'],
                                              label='id')
dog_model = graphlab.nearest_neighbors.create(dogs,
                                              features=['deep_features'],
                                              label='id')
bird_model = graphlab.nearest_neighbors.create(birds,
                                              features=['deep_features'],
                                              label='id')


PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.

In [9]:
cat_model.query(cat_image)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 13.6ms       |
PROGRESS: | Done         |         | 100         | 135.341ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[9]:
query_label reference_label distance rank
0 16289 34.623719208 1
0 45646 36.0068799284 2
0 32139 36.5200813436 3
0 25713 36.7548502521 4
0 331 36.8731228168 5
[5 rows x 4 columns]

In [44]:
cat_model.query(cat_image)
cats[cats['id']==16289]['image'].show()



In [50]:
print dog_model.query(cat_image)
dogs[dogs['id'] == 16976]['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 10.585ms     |
PROGRESS: | Done         |         | 100         | 62.67ms      |
PROGRESS: +--------------+---------+-------------+--------------+
+-------------+-----------------+---------------+------+
| query_label | reference_label |    distance   | rank |
+-------------+-----------------+---------------+------+
|      0      |      16976      | 37.4642628784 |  1   |
|      0      |      13387      | 37.5666832169 |  2   |
|      0      |      35867      | 37.6047267079 |  3   |
|      0      |      44603      | 37.7065585153 |  4   |
|      0      |       6094      | 38.5113254907 |  5   |
+-------------+-----------------+---------------+------+
[5 rows x 4 columns]


In [36]:
cat_model.query(cat_image)[0:5]['distance'].mean()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 16.485ms     |
PROGRESS: | Done         |         | 100         | 46.943ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[36]:
36.15573070978294

In [37]:
dog_model.query(cat_image)[0:5]['distance'].mean()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 7.411ms      |
PROGRESS: | Done         |         | 100         | 72.725ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[37]:
37.77071136184157

In [38]:
cat_model.query(cat_image)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 7.315ms      |
PROGRESS: | Done         |         | 100         | 56.16ms      |
PROGRESS: +--------------+---------+-------------+--------------+
Out[38]:
query_label reference_label distance rank
0 16289 34.623719208 1
0 45646 36.0068799284 2
0 32139 36.5200813436 3
0 25713 36.7548502521 4
0 331 36.8731228168 5
[5 rows x 4 columns]

In [40]:
cats[cats['id']==16289]['image'].show()



In [53]:
image_test_dog = image_test[image_test['label'] == 'dog']
image_test_automobiles = image_test[image_test['label'] == 'automobile']
image_test_cat = image_test[image_test['label'] == 'cat']
image_test_bird = image_test[image_test['label'] == 'bird']

In [54]:
dog_cat_neighbors = cat_model.query(image_test_dog, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 8
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 64000   | 12.5737     | 883.899ms    |
PROGRESS: | Done         | 509000  | 100         | 1.15s        |
PROGRESS: +--------------+---------+-------------+--------------+

In [59]:
dog_distances = graphlab.SFrame({'dog-dog': dog_model.query(image_test_dog, k=1)['distance'],
                                 'dog-cat': cat_model.query(image_test_dog, k=1)['distance'],
                                 'dog-car': car_model.query(image_test_dog, k=1)['distance'],
                                 'dog-bird': bird_model.query(image_test_dog, k=1)['distance']})


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 8
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 64000   | 12.5737     | 432.271ms    |
PROGRESS: | Done         | 509000  | 100         | 507.797ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 8
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 63000   | 12.3772     | 797.001ms    |
PROGRESS: | Done         | 509000  | 100         | 895.339ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 8
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 63000   | 12.3772     | 437.886ms    |
PROGRESS: | Done         | 509000  | 100         | 473.275ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 8
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 60000   | 12.5523     | 366.859ms    |
PROGRESS: | Done         | 478000  | 100         | 459.878ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [60]:
dog_distances


Out[60]:
dog-bird dog-car dog-cat dog-dog
41.7538647304 41.9579761457 36.4196077068 33.4773590373
41.3382958925 46.0021331807 38.8353268874 32.8458495684
38.6157590853 42.9462290692 36.9763410854 35.0397073189
37.0892269954 41.6866060048 34.5750072914 33.9010327697
38.272288694 39.2269664935 34.778824791 37.4849250909
39.1462089236 40.5845117698 35.1171578292 34.945165344
40.523040106 45.1067352961 40.6095830913 39.0957278345
38.1947918393 41.3221140974 39.9036867306 37.7696131032
40.1567131661 41.8244654995 38.0674700168 35.1089144603
45.5597962603 45.4976929401 42.7258732951 43.2422832585
[1000 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.

In [62]:
def is_dog_correct(row):
    if row['dog-dog'] < row['dog-cat'] and row['dog-dog'] < row['dog-car'] and row['dog-dog'] < row['dog-bird']:
        return 1
    else:
        return 0

In [67]:
is_dog_correct(dog_distances[5:6])


Out[67]:
1

In [69]:
dog_distances['classified_correct'] = dog_distances.apply(is_dog_correct)

In [75]:
dog_distances['classified_correct'].sum()/float(len(image_test_dog))


Out[75]:
0.678

In [ ]: